pacman::p_load(rstudioapi, tidyverse, data.table, broom, plyr)
rm(list=ls())
`%!in%` = Negate(`%in%`) 
################################################################################

################################## Labor endowment

####### Brazil

### Agricultural employment
df_u = fread("../../data/landuse/clean/geographicunits_brazil.csv.gz")
df = fread("../../data/factors/clean/employment_brazil.csv.gz")[activity_type!='EAP',]
df = merge(df[ties=='all',],df[ties=='only_family',], by=c('county_id','producer_type','activity_type'))
df = df[,-c('ties.x','ties.y','year.x','year.y')]
setnames(df, old=c('employment.x','employment.y'), new=c('employment','employment_family') )
df$employment_non_family = df$employment-df$employment_family
df_full = merge(df,df_u, by='county_id')

#Subset on producer type and activity type
list_producer_type = c("total")
list_activity_type = c( "total")

df = df_full[producer_type %in% list_producer_type,]
df = df[activity_type %in% list_activity_type,]
df_subset = df

df = df_subset[, list(state, region, employment,employment_family,employment_non_family)]
df = df[, lapply(.SD, sum, na.rm=TRUE), by=list(state, region)][, c('share_non_family'):= list( employment_non_family/employment )]
df_br_state = df

df = df_subset[, list(employment,employment_family,employment_non_family)]
df = df[, lapply(.SD, sum, na.rm=TRUE)]
df = df/1000000
df = df[, c('share_non_family'):= list( employment_non_family/employment )]
df_br_total = df

#Final data set on agricultural employment
df_subset$employment_agriculture = df_subset$employment
df_br_ag = df_subset[,list(country, county_id, amc_id, microregion_id, mesoregion_id, state_id, region, employment_agriculture)]

### Economically active population
df = fread("../../data/factors/clean/employment_brazil.csv.gz")[activity_type=='EAP',]
df$employment_all = df$employment
df_br = merge(df_br_ag,df[,list(county_id,employment_all)], by='county_id')


####### Argentina

df_u = fread("../../data/landuse/clean/geographicunits_argentina.csv.gz")
df = fread("../../data/factors/clean/employment_argentina.csv.gz")
df$amc_id = df$county_id
df$microregion_id = df$county_id
df$mesoregion_id = df$county_id
df_ar = df[, list(country, county_id, amc_id, microregion_id, mesoregion_id, state_id, region, employment_agriculture, employment_all)]

df=rbind(df_ar,df_br)
write.csv(df, gzfile(paste0("../../data/factors/clean/employment.csv.gz")), row.names = FALSE)




################################## Outside option wages

####### Argentina

df = fread("../../data/prices/clean/ars_usd_rate_da.csv.gz")
ars_usd = mean(df[year==2014,]$ars_usd_rate)

df = fread("../../data/factors/raw/w_mean_depto_total_clae2.csv")[, year:= as.integer(format(as.Date(fecha, format="%Y/%d/%m"),"%Y"))]
setnames(df, new=c('date','county_id_indec','state_id_indec','clae2','wage_nonag','year'))
df = df[year == 2014,] 
df = df[clae2 %!in% c(1,2,3),]#exclude agricultural sector wages
df=df[,list(county_id_indec,state_id_indec,wage_nonag)][, lapply(.SD, mean, na.rm=TRUE), by=list(county_id_indec,state_id_indec)][wage_nonag>0,]
df$wage_nonag = 12*(df$wage_nonag/ars_usd)
df_w = df

df_id = fread("../../data/factors/raw/diccionario_cod_depto.csv")
setnames(df_id, new=c('county_id_indec','county_indec','state_id_indec','state_indec'))

df=merge(df_w,df_id, by=c('county_id_indec','state_id_indec'))
df$county_id_indec = as.character(df$county_id_indec)
df$county_id = 'AR0'
df[state_indec=='Buenos Aires',]$county_id = 'AR00'
df$county_id = paste0(df$county_id,df$county_id_indec)
df[county_id=='AR006217',]$county_id = 'AR006218'
df[county_id=='AR02000',]$county_id = 'AR002007'

df_u = fread("../../data/landuse/clean/geographicunits_argentina.csv.gz")[,list(country,county_id,county,state_id,state,region)]
df = merge(df,df_u, by='county_id', all.x=T)
df$amc_id = df$county_id
df$microregion_id = df$county_id
df$mesoregion_id = df$county_id
df_ar = df[, list(country, county_id, amc_id, microregion_id, mesoregion_id, state_id, region, wage_nonag)]


####### Brazil

df = fread("../../data/prices/clean/fred_real_usd_rate.csv.gz")
real_usd = mean(df[year==2010,]$real_usd_rate)
df = fread("../../data/factors/clean/wages_brazil.csv.gz")
df$wage_nonag = 12*(df$income_urban/real_usd)

df_u = fread("../../data/landuse/clean/geographicunits_brazil.csv.gz")
df = merge(df, df_u, by='county_id')
df_br = df[, list(country, county_id, amc_id, microregion_id, mesoregion_id, state_id, region, wage_nonag)]

df=rbind(df_ar,df_br)
write.csv(df, gzfile(paste0("../../data/factors/clean/wages_nonag.csv.gz")), row.names = FALSE)


